{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import the necessary modules for the protocol\n",
    "import ee as ee\n",
    "ee.Initialize()\n",
    "import pandas as pd\n",
    "from scipy.spatial import ConvexHull\n",
    "from sklearn.decomposition import PCA\n",
    "import numpy as np\n",
    "from itertools import combinations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 549794 entries, 0 to 549793\n",
      "Data columns (total 60 columns):\n",
      " #   Column                                            Non-Null Count   Dtype  \n",
      "---  ------                                            --------------   -----  \n",
      " 0   Aridity_Index                                     549002 non-null  float64\n",
      " 1   CHELSA_Annual_Mean_Temperature                    549518 non-null  float64\n",
      " 2   CHELSA_Annual_Precipitation                       549518 non-null  float64\n",
      " 3   CHELSA_Isothermality                              549518 non-null  float64\n",
      " 4   CHELSA_Max_Temperature_of_Warmest_Month           549518 non-null  float64\n",
      " 5   CHELSA_Mean_Diurnal_Range                         549518 non-null  float64\n",
      " 6   CHELSA_Mean_Temperature_of_Coldest_Quarter        549518 non-null  float64\n",
      " 7   CHELSA_Mean_Temperature_of_Driest_Quarter         549518 non-null  float64\n",
      " 8   CHELSA_Mean_Temperature_of_Warmest_Quarter        549518 non-null  float64\n",
      " 9   CHELSA_Mean_Temperature_of_Wettest_Quarter        549518 non-null  float64\n",
      " 10  CHELSA_Min_Temperature_of_Coldest_Month           549518 non-null  float64\n",
      " 11  CHELSA_Precipitation_Seasonality                  549518 non-null  float64\n",
      " 12  CHELSA_Precipitation_of_Coldest_Quarter           549518 non-null  float64\n",
      " 13  CHELSA_Precipitation_of_Driest_Month              549518 non-null  float64\n",
      " 14  CHELSA_Precipitation_of_Driest_Quarter            549518 non-null  float64\n",
      " 15  CHELSA_Precipitation_of_Warmest_Quarter           549518 non-null  float64\n",
      " 16  CHELSA_Precipitation_of_Wettest_Month             549518 non-null  float64\n",
      " 17  CHELSA_Precipitation_of_Wettest_Quarter           549518 non-null  float64\n",
      " 18  CHELSA_Temperature_Annual_Range                   549518 non-null  float64\n",
      " 19  CHELSA_Temperature_Seasonality                    549518 non-null  float64\n",
      " 20  Depth_to_Water_Table                              549290 non-null  float64\n",
      " 21  EarthEnvCloudCover_MODCF_interannualSD            549519 non-null  float64\n",
      " 22  EarthEnvCloudCover_MODCF_intraannualSD            549519 non-null  float64\n",
      " 23  EarthEnvCloudCover_MODCF_meanannual               549519 non-null  float64\n",
      " 24  EarthEnvTopoMed_Eastness                          549519 non-null  float64\n",
      " 25  EarthEnvTopoMed_Elevation                         549519 non-null  float64\n",
      " 26  EarthEnvTopoMed_Northness                         549519 non-null  float64\n",
      " 27  EarthEnvTopoMed_ProfileCurvature                  549519 non-null  float64\n",
      " 28  EarthEnvTopoMed_Roughness                         549519 non-null  float64\n",
      " 29  EarthEnvTopoMed_Slope                             549519 non-null  float64\n",
      " 30  EarthEnvTopoMed_TopoPositionIndex                 549519 non-null  float64\n",
      " 31  SG_Absolute_depth_to_bedrock                      549519 non-null  float64\n",
      " 32  WorldClim2_SolarRadiation_AnnualMean              549002 non-null  float64\n",
      " 33  WorldClim2_WindSpeed_AnnualMean                   549002 non-null  float64\n",
      " 34  WorldClim2_H2OVaporPressure_AnnualMean            549002 non-null  float64\n",
      " 35  NDVI                                              549514 non-null  float64\n",
      " 36  EVI                                               549514 non-null  float64\n",
      " 37  Lai                                               548689 non-null  float64\n",
      " 38  Fpar                                              548689 non-null  float64\n",
      " 39  Npp                                               543740 non-null  float64\n",
      " 40  Tree_Density                                      547998 non-null  float64\n",
      " 41  PET                                               549276 non-null  float64\n",
      " 42  SG_Clay_Content_0_100cm                           549519 non-null  float64\n",
      " 43  SG_Coarse_fragments_0_100cm                       549519 non-null  float64\n",
      " 44  SG_Sand_Content_0_100cm                           549519 non-null  float64\n",
      " 45  SG_Silt_Content_0_100cm                           549519 non-null  float64\n",
      " 46  SG_Soil_pH_H2O_0_100cm                            549519 non-null  float64\n",
      " 47  LandCoverClass_Cultivated_and_Managed_Vegetation  549519 non-null  float64\n",
      " 48  LandCoverClass_Urban_Builtup                      549519 non-null  float64\n",
      " 49  Human_Disturbance                                 549301 non-null  float64\n",
      " 50  treecover2000                                     549794 non-null  float64\n",
      " 51  Nitrogen                                          545507 non-null  float64\n",
      " 52  CanopyHeight                                      549794 non-null  int64  \n",
      " 53  cropland                                          549035 non-null  float64\n",
      " 54  grazing                                           549035 non-null  float64\n",
      " 55  pasture                                           549035 non-null  float64\n",
      " 56  rangeland                                         549035 non-null  float64\n",
      " 57  Fire_Frequency                                    549794 non-null  int64  \n",
      " 58  cnRatio                                           548627 non-null  float64\n",
      " 59  Cation                                            545507 non-null  float64\n",
      "dtypes: float64(58), int64(2)\n",
      "memory usage: 251.7 MB\n",
      "Composite Bands ['Aridity_Index', 'CHELSA_Annual_Mean_Temperature', 'CHELSA_Annual_Precipitation', 'CHELSA_Isothermality', 'CHELSA_Max_Temperature_of_Warmest_Month', 'CHELSA_Mean_Diurnal_Range', 'CHELSA_Mean_Temperature_of_Coldest_Quarter', 'CHELSA_Mean_Temperature_of_Driest_Quarter', 'CHELSA_Mean_Temperature_of_Warmest_Quarter', 'CHELSA_Mean_Temperature_of_Wettest_Quarter', 'CHELSA_Min_Temperature_of_Coldest_Month', 'CHELSA_Precipitation_Seasonality', 'CHELSA_Precipitation_of_Coldest_Quarter', 'CHELSA_Precipitation_of_Driest_Month', 'CHELSA_Precipitation_of_Driest_Quarter', 'CHELSA_Precipitation_of_Warmest_Quarter', 'CHELSA_Precipitation_of_Wettest_Month', 'CHELSA_Precipitation_of_Wettest_Quarter', 'CHELSA_Temperature_Annual_Range', 'CHELSA_Temperature_Seasonality', 'Depth_to_Water_Table', 'EarthEnvCloudCover_MODCF_interannualSD', 'EarthEnvCloudCover_MODCF_intraannualSD', 'EarthEnvCloudCover_MODCF_meanannual', 'EarthEnvTopoMed_Eastness', 'EarthEnvTopoMed_Elevation', 'EarthEnvTopoMed_Northness', 'EarthEnvTopoMed_ProfileCurvature', 'EarthEnvTopoMed_Roughness', 'EarthEnvTopoMed_Slope', 'EarthEnvTopoMed_TopoPositionIndex', 'SG_Absolute_depth_to_bedrock', 'WorldClim2_SolarRadiation_AnnualMean', 'WorldClim2_WindSpeed_AnnualMean', 'WorldClim2_H2OVaporPressure_AnnualMean', 'NDVI', 'EVI', 'Lai', 'Fpar', 'Npp', 'Tree_Density', 'PET', 'SG_Clay_Content_0_100cm', 'SG_Coarse_fragments_0_100cm', 'SG_Sand_Content_0_100cm', 'SG_Silt_Content_0_100cm', 'SG_Soil_pH_H2O_0_100cm', 'LandCoverClass_Cultivated_and_Managed_Vegetation', 'LandCoverClass_Urban_Builtup', 'Human_Disturbance', 'treecover2000', 'Nitrogen', 'CanopyHeight', 'cropland', 'grazing', 'pasture', 'rangeland', 'Fire_Frequency', 'cnRatio', 'Cation']\n"
     ]
    }
   ],
   "source": [
    "# Import the data and view a summary of it\n",
    "importedData = pd.read_csv('Sampled data.csv');\n",
    "# define a vector of the variables we are targeting\n",
    "selectedVariables = [\"Variables to select\"]\n",
    "importedData = importedData[selectedVariables]\n",
    "importedData.info()\n",
    "importedData.describe()\n",
    "\n",
    "# Instantiate the composite that was used to sample the points\n",
    "fullCompositeImage = ee.Image(\"Composite to use\")\n",
    "presentCompositeImage = fullCompositeImage.select(selectedVariables)\n",
    "print('Composite Bands',presentCompositeImage.bandNames().getInfo())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Input the proportion of variance that you would like to cover when running the script\n",
    "propOfVariance = 90"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "def assessExtrapolation(importedData, compositeImage, propOfVariance):\n",
    "    \n",
    "    # Excise the columns of interest from the data frame\n",
    "    variablesOfInterest = importedData.dropna().drop([], axis=1)\n",
    "    \n",
    "    # Compute the mean and standard deviation of each band, then standardize the point data\n",
    "    meanVector = variablesOfInterest.mean()\n",
    "    stdVector = variablesOfInterest.std()\n",
    "    standardizedData = (variablesOfInterest-meanVector)/stdVector\n",
    "    \n",
    "    # Then standardize the composite from which the points were sampled\n",
    "    meanList = meanVector.tolist()\n",
    "    stdList = stdVector.tolist()\n",
    "    bandNames = list(meanVector.index)\n",
    "    meanImage = ee.Image(meanList).rename(bandNames)\n",
    "    stdImage = ee.Image(stdList).rename(bandNames)\n",
    "    standardizedImage = compositeImage.subtract(meanImage).divide(stdImage)\n",
    "    \n",
    "    # Run a PCA on the point samples\n",
    "    pcaOutput = PCA()\n",
    "    pcaOutput.fit(standardizedData)\n",
    "    \n",
    "    # Save the cumulative variance represented by each PC\n",
    "    cumulativeVariance = np.cumsum(np.round(pcaOutput.explained_variance_ratio_, decimals=4)*100)\n",
    "    \n",
    "    # Make a list of PC names for future organizational purposes\n",
    "    pcNames = ['PC'+str(x) for x in range(1,variablesOfInterest.shape[1]+1)]\n",
    "    \n",
    "    # Get the PC loadings as a data frame\n",
    "    loadingsDF = pd.DataFrame(pcaOutput.components_,columns=[str(x)+'_Loads' for x in bandNames],index=pcNames)\n",
    "    \n",
    "    # Get the original data transformed into PC space\n",
    "    transformedData = pd.DataFrame(pcaOutput.fit_transform(standardizedData,standardizedData),columns=pcNames)\n",
    "    \n",
    "    # Make principal components images, multiplying the standardized image by each of the eigenvectors\n",
    "    # Collect each one of the images in a single image collection;\n",
    "    \n",
    "    # First step: make an image collection wherein each image is a PC loadings image\n",
    "    listOfLoadings = ee.List(loadingsDF.values.tolist());\n",
    "    eePCNames = ee.List(pcNames)\n",
    "    zippedList = eePCNames.zip(listOfLoadings)\n",
    "    def makeLoadingsImage(zippedValue):\n",
    "        return ee.Image.constant(ee.List(zippedValue).get(1)).rename(bandNames).set('PC',ee.List(zippedValue).get(0))\n",
    "    loadingsImageCollection = ee.ImageCollection(zippedList.map(makeLoadingsImage))\n",
    "    \n",
    "    # Second step: multiply each of the loadings image by the standardized image and reduce it using a \"sum\"\n",
    "    # to finalize the matrix multiplication\n",
    "    def finalizePCImages(loadingsImage):\n",
    "        return ee.Image(loadingsImage).multiply(standardizedImage).reduce('sum').rename([ee.String(ee.Image(loadingsImage).get('PC'))]).set('PC',ee.String(ee.Image(loadingsImage).get('PC')))\n",
    "    principalComponentsImages = loadingsImageCollection.map(finalizePCImages)\n",
    "    \n",
    "    # Choose how many principal components are of interest in this analysis based on amount of\n",
    "    # variance explained\n",
    "    numberOfComponents = sum(i < propOfVariance for i in cumulativeVariance)+1\n",
    "    print('Number of Principal Components being used:',numberOfComponents)\n",
    "    \n",
    "    # Compute the combinations of the principal components being used to compute the 2-D convex hulls\n",
    "    tupleCombinations = list(combinations(list(pcNames[0:numberOfComponents]),2))\n",
    "    print('Number of Combinations being used:',len(tupleCombinations))\n",
    "    \n",
    "    # Generate convex hulls for an example of the principal components of interest\n",
    "    cHullCoordsList = list()\n",
    "    for c in tupleCombinations:\n",
    "        firstPC = c[0]\n",
    "        secondPC = c[1]\n",
    "        outputCHull = ConvexHull(transformedData[[firstPC,secondPC]])\n",
    "        listOfCoordinates = transformedData.loc[outputCHull.vertices][[firstPC,secondPC]].values.tolist()\n",
    "        flattenedList = [val for sublist in listOfCoordinates for val in sublist]\n",
    "        cHullCoordsList.append(flattenedList)\n",
    "    \n",
    "    # Reformat the image collection to an image with band names that can be selected programmatically\n",
    "    pcImage = principalComponentsImages.toBands().rename(pcNames)\n",
    "    \n",
    "    # Generate an image collection with each PC selected with it's matching PC\n",
    "    listOfPCs = ee.List(tupleCombinations)\n",
    "    listOfCHullCoords = ee.List(cHullCoordsList)\n",
    "    zippedListPCsAndCHulls = listOfPCs.zip(listOfCHullCoords)\n",
    "    \n",
    "    def makeToClassifyImages(zippedListPCsAndCHulls):\n",
    "        imageToClassify = pcImage.select(ee.List(zippedListPCsAndCHulls).get(0)).set('CHullCoords',ee.List(zippedListPCsAndCHulls).get(1))\n",
    "        classifiedImage = imageToClassify.rename('u','v').classify(ee.Classifier.spectralRegion([imageToClassify.get('CHullCoords')]))\n",
    "        return classifiedImage\n",
    "    classifedImages = ee.ImageCollection(zippedListPCsAndCHulls.map(makeToClassifyImages))\n",
    "    finalImageToExport = classifedImages.sum().divide(ee.Image.constant(len(tupleCombinations)))\n",
    "    \n",
    "    return finalImageToExport\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of Principal Components being used: 21\n",
      "Number of Combinations being used: 210\n"
     ]
    }
   ],
   "source": [
    "# Apply the function\n",
    "finalImageToExport = assessExtrapolation(importedData, presentCompositeImage, propOfVariance)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'type': 'Image', 'bands': [{'id': 'classification', 'data_type': {'type': 'PixelType', 'precision': 'float', 'min': -43920820900200448, 'max': 43920820900200448}, 'crs': 'EPSG:4326', 'crs_transform': [1, 0, 0, 0, 1, 0]}]}\n"
     ]
    }
   ],
   "source": [
    "print(finalImageToExport.getInfo())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the export boundary\n",
    "unboundedGeo = ee.Geometry.Polygon([-180, 88, 0, 88, 180, 88, 180, -88, 0, -88, -180, -88], None, False);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'state': 'READY',\n",
       " 'description': 'Wood_Density_Representativeness_IntExt_Export',\n",
       " 'creation_timestamp_ms': 1607434137149,\n",
       " 'update_timestamp_ms': 1607434137149,\n",
       " 'start_timestamp_ms': 0,\n",
       " 'task_type': 'EXPORT_IMAGE',\n",
       " 'id': 'A6MD3Q5HEJUVHG3XUEIEPREB',\n",
       " 'name': 'projects/earthengine-legacy/operations/A6MD3Q5HEJUVHG3XUEIEPREB'}"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Export the image to test it\n",
    "myTask = ee.batch.Export.image.toAsset(\n",
    "    image = finalImageToExport,\n",
    "    description = 'Representativeness_IntExt_Export',\n",
    "    assetId = 'direction to go with file name',\n",
    "    region = unboundedGeo,\n",
    "    maxPixels = 1e13,\n",
    "    crs = 'EPSG:4326',\n",
    "    crsTransform = [0.008333333333333333,0,-180,0,-0.008333333333333333,90]\n",
    ")\n",
    "# start the task and show the status on Google earth engine UI\n",
    "myTask.start()\n",
    "myTask.status()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'state': 'READY',\n",
       " 'description': 'Wood_Density_IntExt_Export_to_Drive',\n",
       " 'creation_timestamp_ms': 1607435793041,\n",
       " 'update_timestamp_ms': 1607435793041,\n",
       " 'start_timestamp_ms': 0,\n",
       " 'task_type': 'EXPORT_IMAGE',\n",
       " 'id': '3FBWWJCOXSAMQQVQCXRMT6IK',\n",
       " 'name': 'projects/earthengine-legacy/operations/3FBWWJCOXSAMQQVQCXRMT6IK'}"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# export to google drive\n",
    "driveTask = ee.batch.Export.image.toDrive(\n",
    "    image = finalImageToExport,\n",
    "    description = 'IntExt_Export_to_Drive',\n",
    "    # fileNamePrefix = 'Wood_Density_Representativeness_IntExt_Map',\n",
    "    region = unboundedGeo,\n",
    "    crs = 'EPSG:4326',\n",
    "    crsTransform= [0.008333333333333333,0,-180,0,-0.008333333333333333,90],\n",
    "    maxPixels = 1e13\n",
    ")\n",
    "driveTask.start()\n",
    "driveTask.status()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
